{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# 執行 Phi-3 模型在 MLFlow\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "---------\n",
        "這本筆記本描述了在 Phi-3 mini 4K Instruct 範例中使用 Transformer 管道。\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Importing required packages\n",
        "import mlflow\n",
        "import transformers"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### 從 HuggingFace 選擇 Phi-3 Mini 4K\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "9f1bb22f43e44552881ddca9ec96f20d",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
          ]
        }
      ],
      "source": [
        "# Selecting Phi-3 HuggingFace model\n",
        "pipeline = transformers.pipeline(\n",
        "    task = \"text-generation\",\n",
        "    model = \"microsoft/Phi-3-mini-4k-instruct\"\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### 產生 MLFlow 物件\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Setting up MLflow model's configuration\n",
        "model_config = {\n",
        "    \"max_length\": 300,\n",
        "    \"truncation\": True,\n",
        "    \"include_prompt\": False,\n",
        "}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Generating MLflow artifact from HuggingFace model\n",
        "with mlflow.start_run():\n",
        "    model_info = mlflow.transformers.log_model(\n",
        "        transformers_model = pipeline,\n",
        "        artifact_path = \"phi3-mlflow-model\",\n",
        "        model_config = model_config,\n",
        "        task = \"llm/v1/chat\"\n",
        "    )"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### 執行 Phi-3 作為 MLFlow 模型\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "7e3557733e55422a81b3a122072830e6",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/34 [00:00<?, ?it/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
          ]
        }
      ],
      "source": [
        "# Loading Phi-3 MLFlow model\n",
        "loaded_model = mlflow.pyfunc.load_model(\n",
        "    model_uri = model_info.model_uri\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "inputs: \n",
              "  ['messages': Array({content: string (required), name: string (optional), role: string (required)}) (required), 'temperature': double (optional), 'max_tokens': long (optional), 'stop': Array(string) (optional), 'n': long (optional), 'stream': boolean (optional)]\n",
              "outputs: \n",
              "  ['id': string (required), 'object': string (required), 'created': long (required), 'model': string (required), 'choices': Array({finish_reason: string (required), index: long (required), message: {content: string (required), name: string (optional), role: string (required)} (required)}) (required), 'usage': {completion_tokens: long (required), prompt_tokens: long (required), total_tokens: long (required)} (required)]\n",
              "params: \n",
              "  None"
            ]
          },
          "execution_count": 6,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "loaded_model.metadata.signature"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {},
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "You are not running the flash-attention implementation, expect numerical differences.\n",
            "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[{'id': 'f9c2ca5c-b684-4a77-8a43-356b671fd797', 'object': 'chat.completion', 'created': 1718709547, 'model': 'microsoft/Phi-3-mini-4k-instruct', 'usage': {'prompt_tokens': 11, 'completion_tokens': 73, 'total_tokens': 84}, 'choices': [{'index': 0, 'finish_reason': 'stop', 'message': {'role': 'assistant', 'content': 'The capital of Spain is Madrid. It is the largest city in Spain and serves as the political, economic, and cultural center of the country. Madrid is located in the center of the Iberian Peninsula and is known for its rich history, art, and architecture, including the Royal Palace, the Prado Museum, and the Plaza Mayor.'}}]}]\n"
          ]
        }
      ],
      "source": [
        "# Testing Phi-3 MLFlow model's inference\n",
        "messages = [{\"role\": \"user\", \"content\": \"What is the capital of Spain?\"}]\n",
        "response = loaded_model.predict(\n",
        "    {\"messages\": messages}\n",
        ")\n",
        "print(response)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Question: What is the capital of Spain?\n",
            "\n",
            "Response: The capital of Spain is Madrid. It is the largest city in Spain and serves as the political, economic, and cultural center of the country. Madrid is located in the center of the Iberian Peninsula and is known for its rich history, art, and architecture, including the Royal Palace, the Prado Museum, and the Plaza Mayor.\n",
            "\n",
            "Usage: {'prompt_tokens': 11, 'completion_tokens': 73, 'total_tokens': 84}\n"
          ]
        }
      ],
      "source": [
        "# \"Beautified\" output\n",
        "print(f\"Question: {messages[0]['content']}\\n\")\n",
        "print(f\"Response: {response[0]['choices'][0]['message']['content']}\\n\")\n",
        "print(f\"Usage: {response[0]['usage']}\")"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}